Testing primers against PR2

1 Goal

  • Display primer set matches against the PR2 database (latest version 4.12.0)

  • Matches are computed by an independent R script (PR2 Primers pr2_match.R) and stored in an rda file which is read by this Rmd file.

  • In this version # of mismatches is computed as well as position of mismatches

1.1 To do

  • Check why there are some negative amplicon (probably some matches ahead - Put a condition that amplicon should be >)

2 Initialize

Load the variables common to the different scripts and the necessary libraries

# Libraries for bioinfo ----------------------------------------------------

  library("Biostrings")

# Libraries tidyr ---------------------------------------------------------

  library("ggplot2")
  theme_set(theme_light())
  library("dplyr")   
  library("readxl")
  library(openxlsx)
  library("tibble")
  library("readr")
  library("purrr")
  library("forcats")
  library("lubridate")
  library("stringr")

# Libraries dvutils and pr2database -------------------------------------------------------
  if(any(grepl("package:dvutils", search()))) detach("package:dvutils", unload=TRUE)
  library("dvutils")

  # if(any(grepl("package:pr2database", search()))) detach("package:pr2database", unload=TRUE)
  # library("pr2database")

# Options for knitting the report -------------  
  
  library(knitr)
  library(rmdformats)
  library(kableExtra) 

  knitr::opts_chunk$set(fig.width=8, 
                        fig.height=6, 
                        eval=TRUE, 
                        cache=TRUE,
                        echo=TRUE,
                        prompt=FALSE,
                        tidy=TRUE,
                        comment=NA,
                        message=FALSE,
                        warning=FALSE)
  opts_knit$set(width=90)
  options(max.print="500")  
  options(knitr.kable.NA = '')

3 Constants

# gene_selected = '16S rRNA'

# for Geisen paper

max_mismatch = 2
gene_selected = "18S_rRNA"  # Can be 18S_rRNA or 16_rRNA
rda_file_label = "_all"  # This label is added at the end of the rda file name
gene_regions = c("V4", "V9")

primer_sets_excluded = c(73, 74)  # These primers should not be included (semi nested PCR)

sequence_length_min = 1500
sequence_length_min_V9 = 1650
sequence_length_min_V4 = 1200

4 Read primer file

4.1 Build the primer set Table

Only keep the 18S primers with V region

# Read from local database
pr2_db <- db_info("pr2_google")
pr2_db_con <- db_connect(pr2_db)

primers <- tbl(pr2_db_con, "pr2_primers") %>% collect()
primer_sets_all <- tbl(pr2_db_con, "pr2_primer_sets") %>% collect()

disconnect <- db_disconnect(pr2_db_con)

if (gene_selected == "18S_rRNA") {
    
    primer_sets <- primer_sets_all %>% filter(gene == "18S rRNA", !is.na(reference_doi), 
        !str_detect(gene_region, "ITS|cloning|full"), str_detect(gene_region, "^V"))  # Start with V
    
    
    gene_regions_all = unique(primer_sets$gene_region)
    print
    
} else if (gene_selected == "16S_rRNA") {
    primer_sets <- primer_sets %>% filter(specificity == "plastid" | (gene == "18S rRNA" & 
        specificity == "universal"))
}
function (x, ...) 
UseMethod("print")
<bytecode: 0x000000001587c5b0>
<environment: namespace:base>
primer_sets <- primer_sets %>% left_join(select(primers, primer_id, fwd_name = name, 
    fwd_seq = sequence, fwd_start_yeast = start_yeast, fwd_end_yeast = end_yeast), 
    by = c(fwd_id = "primer_id")) %>% left_join(select(primers, primer_id, rev_name = name, 
    rev_seq = sequence, rev_start_yeast = start_yeast, rev_end_yeast = end_yeast), 
    by = c(rev_id = "primer_id")) %>% mutate(length_yeast = rev_end_yeast - fwd_start_yeast + 
    1) %>% select(gene_region, specificity, primer_set_id, contains("fwd"), contains("rev"), 
    length_yeast, reference:remark) %>% select(-fwd_id, -rev_id) %>% arrange(gene_region, 
    fwd_start_yeast, rev_start_yeast) %>% filter(!(primer_set_id %in% primer_sets_excluded)) %>% 
    mutate(specific = ifelse(is.na(specificity), "general", "specific")) %>% relocate(specific, 
    .before = specificity)


# , lubridate::today()
file_name = str_c("../output/Table_primers_", gene_selected, ".xlsx")
write.xlsx(primer_sets, file = file_name, firstActiveRow = 2)

n_primers <- list()
n_primers[["general"]] <- nrow(filter(primer_sets, specific == "general"))
n_primers[["specific"]] <- nrow(filter(primer_sets, specific == "specific"))
n_primers
$general
[1] 33

$specific
[1] 22
knitr::kable(select(primer_sets, primer_set_id, fwd_name, rev_name, gene_region, 
    length_yeast)) %>% kableExtra::kable_styling()
primer_set_id fwd_name rev_name gene_region length_yeast
81 18SV1V2F 18SV1V2R V1-V2 341
80 F04 R22 V1-V2 402
72 Kineto_80 Kineto_651 V2-V3 667
59 152+ 528- V2-V3 352
69 Chryso_240 Chryso_651 V2-V3 545
84 SAR_V3_F SAR_V3_R V3 182
62 Cer2F Cer1R V3-V4 588
88 Par_18S-F Par_18S-R V3-V4 562
40 Uni18SF Uni18SR V4 461
13 3NDf V4_euk_R1 V4 465
14 3NDf V4_euk_R2 V4 458
12 3NDf 1132rmod V4 599
34 515FY NSR951 V4 391
35 EUK581-F EUK1134-R V4 578
33 515F Univ 926R V4 589
18 515F 1119r V4 598
4 563f 1132r V4 587
16 TAReuk454FWD1 V4 18S Next.Rev V4 417
7 V4_1f TAReukREV3 V4 417
8 TAReuk454FWD1 TAReukREV3 V4 417
36 TAReuk454FWD1 V4RB V4 417
86 EuF-V4 picoR2 V4 425
90 TAReuk454FWD1 V4r V4 417
19 Claudia Vannini (F) Claudia Vannini (R) V4 439
1 F-566 R-1200 V4 635
15 EUKAF EUKAR V4 410
17 E572F E1009R V4 438
25 NSF563 NSR951 V4 380
2 A-528F R-952 V4 379
39 A-528F PRYM01+7 V4 396
83 A-528F 1132r V4 577
3 574*f 1132r V4 576
77 574f 1132r V4 576
23 590F 1300R V4 720
21 D512for D978rev V4 437
41 Cerc479F Cerc750R V4 283
24 EK-565F-NGS EUK1134-R V4 527
65 S616F_Cerco S947R_Cerco V4 348
66 S616F_Eocer S947R_Cerco V4 348
63 S616F_Cerco S963R_Cerco V4 367
64 S616F_Eocer S963R_Cerco V4 367
5 616f 1132r V4 534
6 616*f 1132r V4 534
38 ChloroF ChloroR V5 494
37 DimA DimB V5 295
87 Oxy_18S-F Oxy_18S-R V5 444
32 926wF 1392-R V6-V8 513
76 F-1183 R-1443 V7 261
92 FF390 FR1 V7-V8 349
67 1301f 1801r V7-V9 539
89 V8f 1510R V8-V9 372
28 1380F 1510R V9 176
29 1389F 1510R V9 167
31 1388F 1510R V9 167
27 1391F EukB V9 169

5 Computing the matches

This part is done by an R script script_primers_pr2_match.R that is executed in batch mode on Roscoff server.

5.1 Programing Notes

  • Use Biostrings

Accessor methods : In the code snippets below, x is an MIndex object.

  • length(x): The number of patterns that matches are stored for.
  • names(x): The names of the patterns that matches are stored for.
  • startIndex(x): A list containing the starting positions of the matches for each pattern.
  • endIndex(x): A list containing the ending positions of the matches for each pattern.
  • elementNROWS(x): An integer vector containing the number of matches for each pattern.
  • x[[i]]: Extract the matches for the i-th pattern as an IRanges object.
  • unlist(x, recursive=TRUE, use.names=TRUE): Return all the matches in a single IRanges object. recursive and use.names are ignored.
  • extractAllMatches(subject, mindex): Return all the matches in a single XStringViews object.

One could also use another function which does not give the position * match_fwd <- vcountPattern(fwd, seq,max.mismatch=0, min.mismatch=0, with.indels=FALSE, fixed=FALSE, algorithm=“auto”)

6 Load the primer match files

This avoids recomputing each time.

All sequences for which introns have been removed are filtered out (contain "_UC" in pr2_accession)

6.1 Build the file for all primer sets - DO ONLY ONCE

pr2_match_list <- list()

for (i in 1:nrow(primer_sets)) {
    cat("i = ", i, "\n")
    rda_file_label <- str_c(sprintf("_set_%03d", primer_sets$primer_set_id[i]), "_mismatches_", 
        max_mismatch)
    load(file = str_c("../output/individual primers/pr2_match_", gene_selected, rda_file_label, 
        ".rda"))
    pr2_match_list[[i]] <- pr2_match
}

pr2_match_final <- pr2_match_list %>% reduce(bind_rows)

saveRDS(pr2_match_final, file = str_c("../output/pr2_match_", gene_selected, "_mismatches_", 
    max_mismatch, ".rds"))

6.2 Read the file for all primer sets and filter

# For testing load(file=str_c('../output/pr2_match_', gene_selected ,'_test_mismatches_', max_mismatch, '.rda'))

pr2_match_final <- readRDS(file = str_c("../output/pr2_match_", gene_selected, "_mismatches_", max_mismatch, ".rds"))

print(str_c("Before filtration: ", nrow(pr2_match_final)))
[1] "Before filtration: 8447679"
# Replace the gene_region and primer_label since they can be changed in the database
primer_sets_label <- primer_sets %>% mutate(primer_label = str_c(str_sub(gene_region, 1, 2), sprintf("%02d", primer_set_id), str_sub(str_replace_na(specificity, 
    replacement = ""), 1, 3), sep = "_")) %>% # Remove the last underscore if left by itself
mutate(primer_label = str_replace(primer_label, "_$", "")) %>% select(primer_set_id, primer_label, gene_region, specific, specificity)

pr2_match_final <- pr2_match_final %>% # Remove sequences for which the introns have been removed
filter(!str_detect(pr2_accession, "_UC")) %>% # Remove sequence that are shorter
filter((str_detect(gene_region, "V4") & sequence_length >= sequence_length_min_V4) | (str_detect(gene_region, "V9") & sequence_length >= 
    sequence_length_min_V9) | (!str_detect(gene_region, "V4|V9") & sequence_length >= sequence_length_min)) %>% # remove '_' from primer_labels
mutate(primer_label = str_replace_all(primer_label, "_", " "), mismatch_number = fwd_mismatch_number + rev_mismatch_number) %>% 
    # Only keep the selected primers
filter(primer_set_id %in% primer_sets$primer_set_id) %>% # Remove primer_label which is created in the line
select(-primer_label, -gene_region) %>% left_join(primer_sets_label)

print(str_c("After filtration: ", nrow(pr2_match_final)))
[1] "After filtration: 4975722"

7 Summarize the data in tables

7.1 Function

primer_summary <- function(pr2_match, taxo_level) {
    summary <- pr2_match %>% group_by({
        {
            taxo_level
        }
    }, gene_region, primer_label, primer_set_id, specific, specificity) %>% summarize(n_seq = n(), 
        fwd_number = sum(!is.na(fwd_pos)), fwd_pct = fwd_number/n_seq * 100, fwd_mismatch_0_pct = sum(fwd_mismatch_number == 
            0, na.rm = TRUE)/fwd_number * 100, fwd_mismatch_1_pct = sum(fwd_mismatch_number == 
            1, na.rm = TRUE)/fwd_number * 100, fwd_mismatch_2_pct = sum(fwd_mismatch_number == 
            2, na.rm = TRUE)/fwd_number * 100, fwd_mismatch_pos = mean(fwd_mismatch_primer_position_3prime, 
            na.rm = TRUE), rev_number = sum(!is.na(rev_pos)), rev_pct = rev_number/n_seq * 
            100, rev_mismatch_0_pct = sum(rev_mismatch_number == 0, na.rm = TRUE)/rev_number * 
            100, rev_mismatch_1_pct = sum(rev_mismatch_number == 1, na.rm = TRUE)/rev_number * 
            100, rev_mismatch_2_pct = sum(rev_mismatch_number == 2, na.rm = TRUE)/rev_number * 
            100, rev_mismatch_pos = mean(rev_mismatch_primer_position_3prime, na.rm = TRUE), 
        ampli_number = sum(!is.na(ampli_size)), ampli_pct = ampli_number/n_seq * 
            100, ampli_size_mean = mean(ampli_size, na.rm = TRUE), ampli_size_sd = sd(ampli_size, 
            na.rm = TRUE), ampli_size_max = max(ampli_size, na.rm = TRUE), ampli_size_min = min(ampli_size, 
            na.rm = TRUE), ampli_mismatch_0_pct = sum(mismatch_number == 0, na.rm = TRUE)/n_seq * 
            100, ampli_mismatch_1_pct = sum(mismatch_number == 1, na.rm = TRUE)/n_seq * 
            100, ampli_mismatch_2_pct = sum(mismatch_number == 2, na.rm = TRUE)/n_seq * 
            100, ampli_mismatch_3_pct = sum(mismatch_number == 3, na.rm = TRUE)/n_seq * 
            100, ampli_mismatch_4_pct = sum(mismatch_number == 4, na.rm = TRUE)/n_seq * 
            100, ampli_mismatch_5_pct = sum(is.na(mismatch_number), na.rm = TRUE)/n_seq * 
            100) %>% mutate(across(contains(c("mismatch", "mean")), ~ifelse(is.nan(.x), 
        NA, .x))) %>% mutate(across(contains(c("size")), ~ifelse(is.infinite(.x), 
        NA, .x))) %>% ungroup()
    
    # Compute the position of the primer for nearly complete sequences
    summary_pos <- pr2_match %>% filter(sequence_length >= sequence_length_min_V9) %>% 
        group_by({
            {
                taxo_level
            }
        }, gene_region, primer_label, primer_set_id) %>% summarize(fwd_pos_mean = mean(fwd_pos, 
        na.rm = TRUE), rev_pos_mean = mean(rev_pos, na.rm = TRUE)) %>% ungroup()
    
    summary <- summary %>% left_join(summary_pos)
    
}

7.2 Summarize all eukaryotes

pr2_match_summary_primer_set <- primer_summary(pr2_match_final, kingdom)

7.3 Summarize per supergroup

pr2_match_summary_primer_set_sg <- primer_summary(pr2_match_final, supergroup)

7.4 Summarize per class

pr2_match_summary_primer_set_class <- primer_summary(pr2_match_final, class)

pr2_taxo <- pr2_match_final %>% select(supergroup, division, class) %>% distinct()

pr2_match_summary_primer_set_class <- pr2_match_summary_primer_set_class %>% left_join(pr2_taxo) %>% 
    relocate(supergroup, division, .before = class)

7.5 Save summaries (also in shiny)

saveRDS(pr2_match_summary_primer_set, file = str_c("../output/pr2_match_", gene_selected, 
    "_mismatches_", max_mismatch, "_summary.rds"))
saveRDS(pr2_match_summary_primer_set_sg, file = str_c("../output/pr2_match_", gene_selected, 
    "_mismatches_", max_mismatch, "_summary_sg.rds"))
saveRDS(pr2_match_summary_primer_set_class, file = str_c("../output/pr2_match_", 
    gene_selected, "_mismatches_", max_mismatch, "_summary_class.rds"))

# Shiny

saveRDS(pr2_match_summary_primer_set_class, file = str_c("../shiny/pr2_match_", gene_selected, 
    "_mismatches_", max_mismatch, "_summary_class.rds"))

7.6 Read summaries

pr2_match_summary_primer_set <- readRDS(file = str_c("../output/pr2_match_", gene_selected, 
    "_mismatches_", max_mismatch, "_summary.rds"))
pr2_match_summary_primer_set_sg <- readRDS(file = str_c("../output/pr2_match_", gene_selected, 
    "_mismatches_", max_mismatch, "_summary_sg.rds"))
pr2_match_summary_primer_set_class <- readRDS(file = str_c("../output/pr2_match_", 
    gene_selected, "_mismatches_", max_mismatch, "_summary_class.rds"))


# Long form for Number of sequences

# This dataframe is used to re-order the bars correctly
pct_category_order <- data.frame(pct_category = c("ampli_pct", "fwd_pct", "rev_pct"), 
    pct_category_order = c(1, 3, 2))

pr2_match_summary_primer_set_long <- pr2_match_summary_primer_set %>% tidyr::pivot_longer(names_to = "pct_category", 
    values_to = "pct_seq", cols = fwd_pct:ampli_pct) %>% left_join(pct_category_order)

8 Graphics

8.1 Common parameters

ncol = 6

fig3 <- list()

8.2 Amplicon length

8.2.1 Scatter

ggplot(pr2_match_final, aes(x = primer_label, y = ampli_size, group = primer_set_id, 
    color = as.factor(mismatch_number))) + geom_point(size = 3) + theme(axis.text.x = element_text(angle = 45, 
    hjust = 1)) + labs(x = "Primer set") + scale_color_viridis_d() + facet_wrap(vars(specific), 
    nrow = 2, scales = "free_x")

8.2.2 Average size

ggplot(pr2_match_final, aes(x = primer_label, y = ampli_size, group = primer_set_id, 
    fill = as.factor(mismatch_number))) + geom_boxplot(outlier.alpha = 0.3) + theme(axis.text.x = element_text(angle = 45, 
    hjust = 1)) + labs(x = "Primer set") + scale_fill_viridis_d() + facet_wrap(vars(specific), 
    nrow = 2, scales = "free_x")

8.3 Example with sets V4 and V9

8.3.1 Plot an example of amplicon distribution (Fig. 3)

for (one_primer_set in c(4, 29)) {
    
    if (one_primer_set == 4) {
        xmin = 570
        xmax = 610
        xmax2 = 2000
    } else {
        xmin = 130
        xmax = 190
        xmax2 = 1000
        
    }
    
    pr2_match_final_one <- pr2_match_final %>% filter(primer_set_id == one_primer_set) %>% 
        filter(!(supergroup %in% c("Apusozoa", "Eukaryota_X", "Protoalveoalata")))
    
    primer_label <- str_replace_all(pr2_match_final_one$primer_label[1], "_", " ")
    
    g <- ggplot(pr2_match_final_one, aes(x = ampli_size)) + geom_density(fill = "blue", 
        alpha = 0.9) + xlab("Amplicon size") + ggtitle(str_c("Primer set - ", primer_label)) + 
        xlim(xmin, xmax)
    
    print(g)
    
    g <- ggplot(pr2_match_final_one, aes(x = ampli_size, fill = supergroup)) + geom_density(alpha = 0.9) + 
        theme_bw() + # theme(legend.position = 'none') +
    guides(fill = guide_legend(nrow = 3)) + theme(legend.position = "top", legend.box = "horizontal") + 
        scale_fill_viridis_d(option = "inferno") + labs(x = "Amplicon size (bp)", 
        y = "Density", fill = "Supergroup") + # ggtitle(str_c('Primer set - ', primer_label)) +
    xlim(xmin, xmax)
    
    print(g)
    fig3[[str_c(one_primer_set, " size_distri")]] <- g
    
    g <- ggplot(pr2_match_final_one, aes(x = supergroup, y = ampli_size)) + geom_boxplot(outlier.alpha = 0.3) + 
        theme_bw() + coord_flip() + # theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
    xlab("Supergroup") + ylab("Amplicon size (bp)") + ylim(0, xmax2)
    
    print(g)
    fig3[[str_c(one_primer_set, " size")]] <- g
    
    g <- ggplot(pr2_match_final_one, aes(x = supergroup, y = ampli_size)) + geom_violin() + 
        coord_flip() + # theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
    xlab("Supergroup") + ylab("Amplicon size (bp)")
    print(g)
    
}

8.3.2 Plot an example of percent amplification

for (one_primer_set in c(4, 29)) {
    
    pr2_match_summary_filtered <- pr2_match_summary_primer_set_sg %>% filter((n_seq > 
        20) & (primer_set_id == one_primer_set) & !(supergroup %in% c("Apusozoa", 
        "Eukaryota_X")))
    
    
    
    g <- ggplot(pr2_match_summary_filtered) + geom_col(data = pr2_match_summary_filtered, 
        aes(x = str_c(supergroup, " - n = ", n_seq), y = ampli_pct, fill = supergroup), 
        position = "dodge") + theme_bw() + # theme(legend.position = 'none') +
    guides(fill = guide_legend(nrow = 2)) + theme(legend.position = "top", legend.box = "horizontal") + 
        scale_fill_viridis_d(option = "inferno") + coord_flip() + labs(x = "% of sequences amplified", 
        y = "", legend = "Supergroup") + scale_y_continuous(minor_breaks = seq(0, 
        100, by = 10), breaks = seq(0, 100, by = 20), limits = c(0, 100))
    
    # ggtitle (str_c('Set - ', one_primer_set, ' - % amplified per Supergroup') )
    
    print(g)
    fig3[[str_c(one_primer_set, " pct")]] <- g
    
    g <- ggplot(filter(pr2_match_summary_filtered, !is.nan(ampli_size_mean))) + geom_point(aes(x = str_c(supergroup, 
        " - n = ", n_seq), y = ampli_size_mean), colour = "black") + coord_flip() + 
        geom_errorbar(aes(x = str_c(supergroup, " - n = ", n_seq), ymax = ampli_size_mean + 
            ampli_size_sd, ymin = ampli_size_mean - ampli_size_sd)) + xlab("Supergroup") + 
        ylab("Amplicon size (bp)")  # +
    # scale_y_continuous(breaks = (1:8)*200, limits = c(0,1500)) + ggtitle
    # (str_c('Set - ', one_primer_set, ' - Amplicon sizes') ) + geom_hline(yintercept
    # = c(450,550) , linetype= 2)
    
    print(g)
    
    
    
    
}

8.3.3 Plot an example of mismatches

for (one_primer_set in c(4, 29)) {
    
    pr2_mismatches <- pr2_match_summary_primer_set_sg %>% tidyr::pivot_longer(names_to = "mismatch_number", 
        values_to = "mismatch_pct", cols = contains("ampli_mismatch"), names_prefix = "ampli_mismatch_") %>% 
        select(-contains("ampli_mismatch")) %>% mutate(mismatch_number = str_sub(mismatch_number, 
        1, 1)) %>% mutate(mismatch_number = str_replace(mismatch_number, "5", "5+")) %>% 
        filter((n_seq > 20) & (primer_set_id == one_primer_set) & !(supergroup %in% 
            c("Apusozoa", "Eukaryota_X")))
    
    g <- ggplot(pr2_mismatches, aes(x = str_c(supergroup, " - n = ", n_seq), y = mismatch_pct, 
        group = primer_set_id, fill = as.factor(mismatch_number))) + geom_col() + 
        theme_bw() + theme(axis.text.y = element_text(angle = 0, hjust = 0, vjust = 0)) + 
        labs(x = "Supergroup", y = "% of sequences with mismatches", fill = "Mismatches") + 
        scale_fill_viridis_d(direction = -1, alpha = 0.85) + guides(fill = guide_legend(nrow = 1)) + 
        theme(legend.position = "top", legend.box = "horizontal") + coord_flip()
    
    print(g)
    
    fig3[[str_c(one_primer_set, "mismatches", sep = " ")]] <- g
    
    
    print(g)
    
    
}

8.3.4 Plot an example of mismatches positions

for (one_primer_set in c(4, 29)) {
    
    df <- pr2_match_final %>% filter(primer_set_id == one_primer_set) %>% filter(!(supergroup %in% 
        c("Apusozoa", "Eukaryota_X")))
    
    g <- ggplot() + geom_histogram(data = filter(df, !is.na(fwd_mismatch_primer_position_3prime)), 
        aes(x = fwd_mismatch_primer_position_3prime, fill = supergroup), binwidth = 1, 
        stat = "bin", alpha = 1) + scale_fill_viridis_d(option = "inferno") + theme_bw() + 
        scale_x_continuous(minor_breaks = 0:30) + labs(y = "Density", x = "Position of mismatches from 3' end", 
        title = "fwd")
    
    print(g)
    fig3[[str_c(one_primer_set, "mismatches positions fwd", sep = " ")]] <- g
    
    g <- ggplot() + geom_histogram(data = filter(df, !is.na(rev_mismatch_primer_position_3prime)), 
        aes(x = rev_mismatch_primer_position_3prime, fill = supergroup), binwidth = 1, 
        stat = "bin", alpha = 1) + scale_fill_viridis_d(option = "inferno") + theme_bw() + 
        scale_x_continuous(minor_breaks = 0:30) + labs(y = "Density", x = "Position of mismatches from 3' end", 
        title = "rev")
    
    print(g)
    fig3[[str_c(one_primer_set, "mismatches positions rev", sep = " ")]] <- g
    
}

8.4 All Eukaryotes

Comments

  • Percent of sequences amplified
    • Logically, the % of seq amplified is always < min(% of sequences matching forwar, % of sequences matching reverse)
    • In general it is the reverse primer that causes problems.
    • Some primer sets do not amplify any sequence (11, 19, 20, 21)
    • For V9, primer set 30 reverse primer is in the ITS region which is not present in the PR2 sequences, so no amplification.
  • Amplicon sizes
    • Only 8 V4 primer sets are suitable for Illumina 2x250 (max amplicon size = 450 bp)
    • This is if we consider the mean… If we consider the variation around the mean then, only 3 suitable for 2x250
    • Five more V4 primer sets are suitable for Illumina 3x250 (max amplicon size = 550 bp)

8.4.1 Plot only V4 and V9

fig1 <- list()

for (one_region in gene_regions) {
    
    pr2_match_summary_primer_set_region_long <- pr2_match_summary_primer_set_long %>% 
        filter(gene_region == one_region) %>% # Remove the group specific primers
    filter(specific == "general") %>% filter(pct_category %in% c("ampli_pct", "fwd_pct", 
        "rev_pct"))
    
    pr2_match_summary_primer_set_region <- pr2_match_summary_primer_set %>% filter(gene_region == 
        one_region) %>% # Remove the group specific primers)
    filter(specific == "general")
    
    pr2_match_region <- pr2_match_final %>% filter(gene_region == one_region) %>% 
        # Remove the group specific primers
    filter(specific == "general")
    
    # % Ampli
    
    g <- ggplot(pr2_match_summary_primer_set_region_long) + geom_col(aes(x = primer_label, 
        y = pct_seq, fill = fct_reorder(pct_category, pct_category_order)), width = 0.7, 
        position = "dodge") + theme_bw() + xlab("Primer set") + ylab("% of sequences amplified") + 
        scale_fill_manual(name = "", values = c(ampli_pct = "black", fwd_pct = "grey80", 
            rev_pct = "grey40"), labels = c("Amplicons", "Primer rev", "Primer fwd")) + 
        ggtitle(str_c(one_region, " - Percentage of sequences recovered")) + theme(axis.text.x = element_text(angle = 0, 
        hjust = 1)) + ylim(0, 100) + coord_flip() + guides(fill = guide_legend(nrow = 1)) + 
        theme(legend.position = "top", legend.box = "horizontal")
    
    print(g)
    
    # Size
    
    fig1[[str_c(one_region, "pct", sep = " ")]] <- g
    
    
    g <- ggplot(filter(pr2_match_summary_primer_set_region, !is.nan(ampli_size_mean))) + 
        geom_point(aes(x = primer_label, y = ampli_size_mean), colour = "black") + 
        geom_errorbar(aes(x = primer_label, ymax = ampli_size_mean + ampli_size_sd, 
            ymin = ampli_size_mean - ampli_size_sd)) + theme_bw() + xlab("Primer set") + 
        ylab("Amplicon size (bp)") + # scale_y_continuous(breaks = (1:8)*200, limits = c(0,1500)) +
    ggtitle(str_c(one_region, " - Amplicon size - Lines correspond to limits for Illumina 2x250 and 2x300 respectively")) + 
        geom_hline(yintercept = c(450, 550), linetype = c(2, 3)) + ylim(0, 850) + 
        theme(axis.text.x = element_text(angle = 0, hjust = 1)) + coord_flip()
    
    print(g)
    
    fig1[[str_c(one_region, "size", sep = " ")]] <- g
    
    # Mismatches
    
    pr2_mismatches <- pr2_match_summary_primer_set %>% tidyr::pivot_longer(names_to = "mismatch_number", 
        values_to = "mismatch_pct", cols = contains("ampli_mismatch"), names_prefix = "ampli_mismatch_") %>% 
        select(-contains("ampli_mismatch")) %>% mutate(mismatch_number = str_sub(mismatch_number, 
        1, 1)) %>% mutate(mismatch_number = str_replace(mismatch_number, "5", "5+")) %>% 
        filter(gene_region == one_region) %>% # Remove the group specific primers
    filter(specific == "general")
    
    g <- ggplot(pr2_mismatches, aes(x = primer_label, y = mismatch_pct, group = primer_set_id, 
        fill = as.factor(mismatch_number))) + geom_col() + theme_bw() + theme(axis.text.y = element_text(angle = 0, 
        hjust = 0, vjust = 0)) + labs(x = "Primer set", y = "% of sequences with mismatches", 
        fill = "Mismatches") + scale_fill_viridis_d(direction = -1, alpha = 0.85) + 
        guides(fill = guide_legend(nrow = 1)) + theme(legend.position = "top", legend.box = "horizontal") + 
        coord_flip()
    
    print(g)
    
    fig1[[str_c(one_region, "mismatches", sep = " ")]] <- g
    
    
}

8.4.2 Plot all with panel general and specific

for (specific_one in c("general", "specific")) {
    
    df <- pr2_match_summary_primer_set_long %>% filter(pct_category %in% c("ampli_pct", 
        "fwd_pct", "rev_pct")) %>% filter(specific == specific_one)
    
    g <- ggplot(df) + geom_col(aes(x = primer_label, y = pct_seq, fill = fct_reorder(pct_category, 
        pct_category_order)), width = 0.7, position = "dodge") + theme_bw() + xlab("Primer set") + 
        ylab("% of sequences amplified") + scale_fill_manual(name = "", values = c(ampli_pct = "black", 
        fwd_pct = "grey80", rev_pct = "grey40"), labels = c("Amplicons", "Primer rev", 
        "Primer fwd")) + ggtitle(str_c(specific_one, " - Percentage of sequences recovered")) + 
        theme(axis.text.y = element_text(angle = 0, hjust = 0, vjust = 0)) + ylim(0, 
        100) + coord_flip() + guides(fill = guide_legend(nrow = 1)) + theme(legend.position = "top", 
        legend.box = "horizontal")
    
    
    print(g)
    
    fig1[[str_c(specific_one, "pct", sep = " ")]] <- g
    
    df <- pr2_match_summary_primer_set %>% filter(!is.nan(ampli_size_mean)) %>% filter(specific == 
        specific_one)
    
    g <- ggplot(df) + geom_point(aes(x = primer_label, y = ampli_size_mean), colour = "black") + 
        geom_errorbar(aes(x = primer_label, ymax = ampli_size_mean + ampli_size_sd, 
            ymin = ampli_size_mean - ampli_size_sd)) + theme_bw() + xlab("Primer set") + 
        ylab("Amplicon size (bp)") + # scale_y_continuous(breaks = (1:8)*200, limits = c(0,1500)) +
    ggtitle(str_c(specific_one, " - Amplicon size - Lines correspond to limits for Illumina 2x250 and 2x300 respectively")) + 
        geom_hline(yintercept = c(450, 550), linetype = c(2, 3)) + ylim(0, 850) + 
        theme(axis.text.y = element_text(angle = 0, hjust = 0)) + coord_flip()
    
    print(g)
    
    fig1[[str_c(specific_one, "size", sep = " ")]] <- g
    
}

8.5 Mismatches

8.5.1 Number of mismatches

for (specific_one in c("general", "specific")) {
    
    pr2_mismatches <- pr2_match_summary_primer_set %>% tidyr::pivot_longer(names_to = "mismatch_number", 
        values_to = "mismatch_pct", cols = contains("ampli_mismatch"), names_prefix = "ampli_mismatch_") %>% 
        select(-contains("ampli_mismatch")) %>% mutate(mismatch_number = str_sub(mismatch_number, 
        1, 1)) %>% mutate(mismatch_number = str_replace(mismatch_number, "5", "5+")) %>% 
        filter(specific == specific_one)
    
    g <- ggplot(pr2_mismatches, aes(x = primer_label, y = mismatch_pct, group = primer_set_id, 
        fill = as.factor(mismatch_number))) + geom_col() + theme_bw() + theme(axis.text.y = element_text(angle = 0, 
        hjust = 0, vjust = 0)) + labs(x = "Primer set", y = "% of sequences with mismatches", 
        fill = "Mismatches") + scale_fill_viridis_d(direction = -1, alpha = 0.85) + 
        guides(fill = guide_legend(nrow = 1)) + theme(legend.position = "top", legend.box = "horizontal") + 
        coord_flip()
    
    print(g)
    
    fig1[[str_c(specific_one, "mismatches", sep = " ")]] <- g
    
    
}

8.5.2 Position of mismatches

ggplot(pr2_match_final, aes(x = primer_label)) + geom_boxplot(aes(y = fwd_mismatch_primer_position_3prime), 
    outlier.alpha = 0.3, color = "blue") + coord_flip() + theme_bw() + theme(axis.text.y = element_text(angle = 0, 
    vjust = 0, hjust = 0)) + scale_y_continuous(minor_breaks = 0:25, breaks = seq(0, 
    25, by = 5), limits = c(0, 25)) + labs(x = "Primer set", y = "Position of mismatches", 
    title = "Primer fwd") + facet_wrap(vars(specific), nrow = 2, scales = "free_y")

ggplot(pr2_match_final, aes(x = primer_label)) + geom_boxplot(aes(y = rev_mismatch_primer_position_3prime), 
    outlier.alpha = 0.3, color = "red") + coord_flip() + theme_bw() + theme(axis.text.y = element_text(angle = 0, 
    vjust = 0, hjust = 0)) + scale_y_continuous(minor_breaks = 0:25, breaks = seq(0, 
    25, by = 5), limits = c(0, 25)) + labs(x = "Primer set", y = "Position of mismatches", 
    title = "Primer rev") + facet_wrap(vars(specific), nrow = 2, scales = "free_y")

ggplot(filter(pr2_match_final, !is.na(rev_mismatch_primer_position_3prime))) + geom_histogram(aes(x = rev_mismatch_primer_position_3prime), 
    color = "red", binwidth = 1, stat = "density") + theme_bw() + scale_x_continuous(minor_breaks = 0:30, 
    breaks = seq(0, 30, by = 5), limits = c(0, 30)) + labs(y = "Primer set", x = "Position of mismatches", 
    title = "Primer rev") + facet_wrap(vars(primer_label), ncol = ncol)

## By supergroup

Comments

  • Excavata have a very different patterns from the rest of the group. They are not amplified by quite a few primer sets. They have also bigger amplicons
  • Some groups exhibit higher variability in amplicon size (e.g Chlorophyta)

8.5.3 % of Ampli and Amplicon size

fig_supergroup <- list()

for (specific_one in c("general", "specific")) {
    
    pr2_match_summary_primer_set_sg_region <- pr2_match_summary_primer_set_sg %>% 
        filter(specific == specific_one) %>% filter((n_seq > 20)) %>% filter(!(supergroup %in% 
        c("Apusozoa", "Eukaryota_X")))
    
    
    
    g <- ggplot(pr2_match_summary_primer_set_sg_region) + geom_col(aes(x = supergroup, 
        y = ampli_pct, fill = supergroup), position = "dodge") + theme_bw() + coord_flip() + 
        ylab("% of sequences amplified") + xlab("Supergroup") + ggtitle(str_c(specific_one, 
        " primers")) + scale_fill_viridis_d(option = "inferno") + ylim(0, 100) + 
        facet_wrap(~primer_label, scales = "fixed", ncol = ncol) + guides(fill = guide_legend(nrow = 1)) + 
        theme(legend.position = "top", legend.box = "horizontal") + theme(legend.position = "none")
    
    print(g)
    
    list_label <- str_c("pct", specific_one, sep = " ")
    fig_supergroup[[list_label]] <- g
    
    
    
    g <- ggplot(filter(pr2_match_summary_primer_set_sg_region, !is.nan(ampli_size_mean))) + 
        geom_point(aes(x = supergroup, y = ampli_size_mean), colour = "black") + 
        theme_bw() + coord_flip() + geom_errorbar(aes(x = supergroup, ymax = ampli_size_mean + 
        ampli_size_sd, ymin = ampli_size_mean - ampli_size_sd)) + xlab("Supergroup") + 
        ylab("Amplicon size (bp)") + ggtitle(str_c(specific_one, " primers")) + geom_hline(yintercept = c(450, 
        550), linetype = 2) + facet_wrap(~primer_label, scales = "fixed", ncol = ncol)
    
    print(g)
    
    list_label <- str_c("size", specific_one, sep = " ")
    
    fig_supergroup[[list_label]] <- g
    
}

8.5.4 Number of mismatches

pr2_mismatches_sg <- pr2_match_summary_primer_set_sg %>% tidyr::pivot_longer(names_to = "mismatch_number", 
    values_to = "mismatch_pct", cols = contains("ampli_mismatch"), names_prefix = "ampli_mismatch_") %>% 
    select(-contains("ampli_mismatch")) %>% mutate(mismatch_number = str_sub(mismatch_number, 
    1, 1)) %>% mutate(mismatch_number = str_replace(mismatch_number, "5", "5+")) %>% 
    filter((n_seq > 20)) %>% filter(!(supergroup %in% c("Apusozoa", "Eukaryota_X")))

for (specific_one in c("general", "specific")) {
    
    pr2_mismatches_sg_one <- pr2_mismatches_sg %>% filter(specific == specific_one)
    
    g <- ggplot(pr2_mismatches_sg_one, aes(x = supergroup, y = mismatch_pct, fill = fct_rev(mismatch_number))) + 
        geom_col() + theme_bw() + theme(axis.text.x = element_text(angle = 0, hjust = 1)) + 
        labs(x = "Group", y = "% of sequences with mismatches", title = str_c(specific_one, 
            " primers"), fill = "Mismatches") + scale_fill_viridis_d(direction = 1, 
        alpha = 0.85) + guides(fill = guide_legend(nrow = 1)) + theme(legend.position = "top", 
        legend.box = "horizontal") + coord_flip() + facet_wrap(vars(primer_label), 
        ncol = ncol)
    
    print(g)
    
    fig_supergroup[[str_c("mismatches", specific_one, sep = " ")]] <- g
}

8.6 By class for autotrophs

fig_class <- list()

for (specific_one in c("general", "specific")) {
    
    pr2_match_summary_filtered_one <- pr2_match_summary_primer_set_class %>% filter(n_seq > 
        20) %>% filter(division %in% c("Haptophyta", "Dinoflagellata", "Chlorophyta", 
        "Ochrophyta", "Cryptophyta")) %>% filter(specific == specific_one)
    
    
    g <- ggplot(pr2_match_summary_filtered_one) + geom_col(aes(x = str_c(str_trunc(division, 
        20, ellipsis = ""), "-", class), y = ampli_pct, fill = class), position = "dodge") + 
        theme_bw() + coord_flip() + ylab("% of sequences amplified") + xlab("Class") + 
        theme(axis.text.y = element_text(angle = 0, hjust = 0, vjust = 0)) + scale_fill_viridis_d(option = "inferno") + 
        ylim(0, 100) + facet_wrap(vars(primer_label), scales = "fixed", ncol = ncol) + 
        guides(fill = guide_legend(nrow = 1)) + theme(legend.position = "top", legend.box = "horizontal") + 
        theme(legend.position = "none")
    
    print(g)
    
    list_label <- str_c("pct", specific_one, sep = " ")
    
    fig_class[[list_label]] <- g
    
    g <- ggplot(filter(pr2_match_summary_filtered_one, !is.nan(ampli_size_mean))) + 
        geom_point(aes(x = str_c(str_trunc(division, 3, ellipsis = ""), "-", class), 
            y = ampli_size_mean), colour = "black") + geom_errorbar(aes(x = str_c(str_trunc(division, 
        3, ellipsis = ""), "-", class), ymax = ampli_size_mean + ampli_size_sd, ymin = ampli_size_mean - 
        ampli_size_sd)) + theme_bw() + coord_flip() + theme(axis.text.y = element_text(angle = 0, 
        hjust = 0, vjust = 0)) + xlab("Class") + ylab("Amplicon size (bp)") + # scale_y_continuous(breaks = (1:8)*200, limits = c(0,1500)) + ggtitle
    # (str_c('Set -', one_primer_set, ' - Amplicon size - Lines correspond to limits
    # for Illumina 2x250 and 2x300 respectively') ) +
    geom_hline(yintercept = c(450, 550), linetype = 2) + facet_wrap(~primer_label, 
        scales = "fixed", ncol = ncol)
    
    print(g)
    
    list_label <- str_c("size", specific_one, sep = " ")
    
    fig_class[[list_label]] <- g
}

9 Specific analyses

9.1 Specific et sets for Opisthokonta

  • 35 UnNonMet
  • 16 Piredda
  • 17 Comeau
for (one_primer_set in c(16, 17, 35)) {
    
    pr2_match_summary_filtered <- filter(pr2_match_summary_primer_set_class, (n_seq > 
        20) & (supergroup %in% c("Opisthokonta")) & (primer_set_id == one_primer_set))
    
    g <- ggplot(pr2_match_summary_filtered) + geom_col(data = pr2_match_summary_filtered, 
        aes(x = str_c(division, "-", class, " - n= ", n_seq), y = ampli_pct), fill = "grey", 
        position = "dodge") + theme(axis.text.y = element_text(angle = 0, hjust = 0, 
        vjust = 0)) + theme_bw() + coord_flip() + ylab("% of sequences amplified") + 
        xlab("Class") + ggtitle(str_c("Set -", one_primer_set, " - % amplified per Class"))
    
    print(g)
    
    g <- ggplot(filter(pr2_match_summary_filtered, !is.nan(ampli_size_mean))) + geom_point(aes(x = str_c(division, 
        "-", class, " - n= ", n_seq), y = ampli_size_mean), colour = "black") + geom_errorbar(aes(x = str_c(division, 
        "-", class, " - n= ", n_seq), ymax = ampli_size_mean + ampli_size_sd, ymin = ampli_size_mean - 
        ampli_size_sd)) + theme(axis.text.y = element_text(angle = 0, hjust = 0, 
        vjust = 0)) + coord_flip() + xlab("Class") + ylab("Amplicon size (bp)") + 
        # scale_y_continuous(breaks = (1:8)*200, limits = c(0,1500)) +
    ggtitle(str_c("Set -", one_primer_set, " - Amplicon size - Lines correspond to limits for Illumina 2x250 and 2x300 respectively")) + 
        geom_hline(yintercept = c(450, 550), linetype = 2)
    
    print(g)
    
}

10 Tables

10.1 Table 1 and Supplementary tables

if (gene_selected == "18S_rRNA") file_name_xls = "../output/primers 18S.xlsx"

sheets = list(`Table 1` = primer_sets, `Summary Eukaryotes` = pr2_match_summary_primer_set, 
    `Summary Supergroup` = pr2_match_summary_primer_set_sg, `Summary Class` = pr2_match_summary_primer_set_class)


excel_style <- openxlsx::createStyle(numFmt = "0.00")
openxlsx::write.xlsx(sheets, file = file_name_xls, zoom = 80, firstActiveRow = 2, 
    firstActiveCol = 6, colWidths = "auto")

11 Figures

11.1 Fig. 1 - V4 and V9 - Amplification % and size

legend_pct <- cowplot::get_legend( fig1[["V4 pct"]] + 
                      # create some space to the left of the legend    
                      theme(legend.box.margin = margin(0, 0, 0, 20))
                    )

legend_mismatches <- cowplot::get_legend( fig1[["V4 mismatches"]] + 
                      # create some space to the left of the legend    
                      theme(legend.box.margin = margin(0, 0, 0, 20))
                    )

fig_1 <- cowplot::plot_grid(legend_pct, NULL,legend_mismatches,
                            fig1[["V4 pct"]]  +ggtitle("")+ theme(legend.position="none") + ylab(""),
                            fig1[["V4 mismatches"]] +ggtitle("")+ theme(legend.position="none") + xlab("") + ylab(""), 
                            fig1[["V4 size"]] +ggtitle("") + xlab("") + ylab(""),
                            fig1[["V9 pct"]]  +ggtitle("")+ theme(legend.position="none"),  
                            fig1[["V9 mismatches"]] +ggtitle("")+ theme(legend.position="none") + xlab(""), 
                            fig1[["V9 size"]] +ggtitle("") + xlab(""), 
                   labels = c("","","","A","","", "B","",""), label_size = 22, label_x = 0.05,
                   ncol = 3, nrow = 3, align = "hv", 
                   rel_heights = c(1,20,6), rel_widths = c(12,12,10) )
fig_1

ggsave(plot= fig_1 , filename="../figs/fig_pct_sizes_mismatches_V4_V9.pdf",
           width = 12 , height = 12, scale=1.75, units="cm", useDingbats=FALSE)

11.2 Fig. 1 - All - Amplification % and size and mismatches

legend_pct <- cowplot::get_legend( fig1[["general pct"]] + 
                      # create some space to the left of the legend    
                      theme(legend.box.margin = margin(0, 0, 0, 20))
                    )

legend_mismatches <- cowplot::get_legend( fig1[["general mismatches"]] + 
                      # create some space to the left of the legend    
                      theme(legend.box.margin = margin(0, 0, 0, 20))
                    )

fig_1 <- cowplot::plot_grid(legend_pct, legend_mismatches, NULL,
                            fig1[["general pct"]]  +ggtitle("")+ theme(legend.position="none") + ylab(""),
                            fig1[["general mismatches"]] +ggtitle("")+ theme(legend.position="none") + xlab("") + ylab(""), 
                            fig1[["general size"]] +ggtitle("") + xlab("") + ylab(""),
                            fig1[["specific pct"]]  +ggtitle("")+ theme(legend.position="none"), 
                            fig1[["specific mismatches"]] +ggtitle("")+ theme(legend.position="none") + xlab(""), 
                            fig1[["specific size"]] +ggtitle("") + xlab(""),
                   labels = c("","","", "A","","", "B","",""), 
                   label_size = 12, label_x = 0.05,
                    ncol = 3, nrow = 3, align = "hv", 
                   rel_heights = c(1,38,26), rel_widths = c(12,12,10) )
fig_1

ggsave(plot= fig_1 , filename="../figs/fig_pct_sizes_mismatches_all.pdf",
           width = 15 , height = 12, scale=1.75, units="cm", useDingbats=FALSE)

11.3 Fig. - Example of V4 and V9

legend_mismatches <- cowplot::get_legend( fig3[["4 mismatches"]] + 
                      # create some space to the left of the legend    
                      theme(legend.box.margin = margin(t = 20, r = 0, b = 20, l = 20))
                    )

legend_size_distri <- cowplot::get_legend( fig3[["4 size_distri"]] + 
                      # create some space to the left of the legend    
                      theme(legend.box.margin = margin(t = 20, r = 0, b = 20, l = 20))
                    )


fig_sub4 <- cowplot::plot_grid(fig3[["4 mismatches positions fwd"]] + theme(legend.position="none") + ggtitle("fwd")+ xlab(""), 
                               fig3[["4 mismatches positions rev"]] + theme(legend.position="none") + ggtitle("rev"),
                               nrow = 2,
                               align = "v" )

fig_sub29 <- cowplot::plot_grid(fig3[["29 mismatches positions fwd"]] + theme(legend.position="none") + ggtitle("fwd")+ xlab(""), 
                               fig3[["29 mismatches positions rev"]] + theme(legend.position="none") + ggtitle("rev"),
                               nrow = 2,
                               align = "v" )


fig <- cowplot::plot_grid(legend_mismatches, 
                            legend_size_distri, 
                            fig3[["4 mismatches"]] + theme(legend.position="none"),
                            fig_sub4, 
                            fig3[["4 size_distri"]] + theme(legend.position="none"), 
                            fig3[["4 size"]],
                            fig3[["29 mismatches"]]+ theme(legend.position="none"),
                            fig_sub29, 
                            fig3[["29 size_distri"]]+ theme(legend.position="none"), 
                            fig3[["29 size"]], 
                   labels = c("","","A - Primer V4 #4","","","", "B - Primer V9 #29","","",""), 
                   label_y = 1.05, hjust = -0.1, scale=0.95,
                   ncol = 2, nrow = 5, rel_heights = c(4,20,20,20,20),
                   align = "h" )

fig

ggsave(plot= fig , filename="../figs/fig_examples_V4_V9.pdf",
           width = 20 , height = 25, scale=1.8, units="cm", useDingbats=FALSE)

11.4 Fig. 3 - Supergroup analysis

row_height = 5

for (specific_one in c("general", "specific")) {
    
    
    fig_3 <- cowplot::plot_grid(fig_supergroup[[str_c("mismatches", specific_one, 
        sep = " ")]], NULL, fig_supergroup[[str_c("size", specific_one, sep = " ")]], 
        NULL, labels = c("A", "", "B", ""), ncol = 2, nrow = 2, align = "v", rel_widths = c(13, 
            0.2))
    
    fig_3
    
    height <- row_height * (ceiling(n_primers[[specific_one]]/ncol))
    
    ggsave(plot = fig_3, filename = str_c("../figs/fig_supergroup_", specific_one, 
        ".pdf"), width = 14, height = height, scale = 1.75, units = "cm", useDingbats = FALSE)
    
}

11.5 Fig. 4 - Class analysis

row_height = 3.5

for (specific_one in c("general", "specific")) {
    
    fig_4 <- cowplot::plot_grid(fig_class[[str_c("pct", specific_one, sep = " ")]], 
        ncol = 1, nrow = 1, align = "v")
    
    fig_4
    
    height <- row_height * (trunc(n_primers[[specific_one]]/ncol))
    
    ggsave(plot = fig_4, filename = str_c("../figs/fig_class_", specific_one, ".pdf"), 
        width = 7, height = height, scale = 3, units = "cm", useDingbats = FALSE)
    
    
}

Daniel Vaulot

03 07 2020